In [48]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors
In [49]:
df = pd.read_csv('log.csv', parse_dates=['Joined', 'Left', 'Time'])
df['Time'] = df['Left'] - df['Joined']
df = df[df['Member Name'] != 'Sauron']
df
Out[49]:
Member Id Member Name Joined Left Time
0 210743674525450240 stratos1805 2021-04-27 02:21:53.125370 2021-04-27 02:33:41.455374 0 days 00:11:48.330004
1 132415133711466496 סדאם חוסיין ז"ל 2021-04-27 02:21:53.125370 2021-04-27 02:35:55.345365 0 days 00:14:02.219995
2 192642893159202816 MrStormagedon 2021-04-27 02:21:53.125370 2021-04-27 03:10:15.058915 0 days 00:48:21.933545
3 193006567455457280 Cardi Biton 2021-04-27 12:44:58.546623 2021-04-27 12:56:30.695541 0 days 00:11:32.148918
4 192986627998613504 rone 2021-04-27 13:55:12.166887 2021-04-27 14:14:01.761058 0 days 00:18:49.594171
... ... ... ... ... ...
524 192642893159202816 MrStormagedon 2021-05-26 21:47:46.678238 2021-05-27 00:32:03.773733 0 days 02:44:17.095495
525 193006567455457280 Cardi Biton 2021-05-26 22:23:08.818131 2021-05-27 00:48:07.561674 0 days 02:24:58.743543
526 132415133711466496 סדאם חוסיין ז"ל 2021-05-26 21:05:15.638072 2021-05-27 00:48:13.596533 0 days 03:42:57.958461
527 193052172219318274 NightSeeker 2021-05-26 22:43:29.837002 2021-05-27 00:48:13.596533 0 days 02:04:43.759531
528 192643774562828288 October 2021-05-27 12:20:28.581935 2021-05-27 12:20:32.596785 0 days 00:00:04.014850

527 rows × 5 columns

In [50]:
min_datetime = df['Joined'].min()
max_datetime = df['Left'].max()
min_date, max_date = min_datetime.date(), max_datetime.date()
df = df.groupby('Member Name').filter(lambda x: x['Time'].sum().total_seconds() / 3600 > 5)
unique_members = df['Member Name'].unique().tolist()

colormap = plt.cm.tab20(np.linspace(0, 1, len(unique_members)))
palette = [matplotlib.colors.to_hex(c) for c in colormap]

print(f'Data collected over {max_datetime - min_datetime} with {len(unique_members)} unique members')
print(unique_members)
Data collected over 30 days 09:58:39.471415 with 14 unique members
['stratos1805', 'סדאם חוסיין ז"ל', 'MrStormagedon', 'Cardi Biton', 'rone', "יניבצ'וק", 'October', 'Detka', 'XPEZNAZ', 'NightSeeker', 'Groovy', 'xWarLord_Sharon', 'פליציק', 'OMGitsyuvalT']
In [51]:
# sessions.index = pd.CategoricalIndex(sessions.index, unique_members)
# sessions = sessions.sort_values(ascending=False)
# sessions
# sessions.plot(use_index=True, kind='box', figsize=(20, 5), title='Average Session Length', xlabel='', ylabel='Hours', rot=0, color=colormap)
# plt.show()
plt.figure(figsize=(20, 5))
sns.boxplot(x='Member Name', y=df['Time'].dt.seconds / 3600, data=df, palette=palette)
plt.xlabel('')
plt.ylabel('Hours')
plt.show()
2021-05-27T12:24:49.541009 image/svg+xml Matplotlib v3.4.1, https://matplotlib.org/
In [52]:
start = min_datetime.replace(minute=0, second=0, microsecond=0)
end = max_datetime.replace(minute=0, second=0, microsecond=0) + pd.offsets.Hour(1)
attendance_data = []
date = start
while date < end:
    members = df[(df['Joined'] <= date + pd.offsets.Minute(1)) & (df['Left'] >= date)]['Member Name'].to_list()
    attendance_data.append([date] + [member in members for member in unique_members])
    date += pd.offsets.Minute(1)
att_df = pd.DataFrame(attendance_data, columns=['Date'] + unique_members)
In [53]:
axes = att_df.set_index('Date').rolling(1).mean().plot(figsize=(14, 24), yticks=[0, 1], subplots=True, sharex=True, kind='area', color=colormap)
for ax in axes:
    ax.set_yticklabels(['Disconnected', 'Connected'])
plt.subplots_adjust()
plt.show()
2021-05-27T12:25:19.619551 image/svg+xml Matplotlib v3.4.1, https://matplotlib.org/
In [54]:
def part_of_day(date):
    if 0 <= date.hour < 6:
        return '00:00 - 05:59'
    elif 6 <= date.hour < 12:
        return '06:00 - 11:59'
    elif 12 <= date.hour < 18:
        return '12:00 - 17:59'
    else:
        return '18:00 - 23:59'

weekday_order = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']

daily_att = att_df.groupby(att_df['Date'].dt.day_name()).sum() / 60
daily_att.index = pd.CategoricalIndex(daily_att.index, weekday_order)
daily_att = daily_att.sort_index()
hourly_att = att_df.groupby(att_df['Date'].apply(part_of_day)).sum() / 60
all_att = att_df.sum() / 60

daily_att.plot(use_index=True, kind='bar', figsize=(20, 5), title='Daily Attendance', xlabel='Weekday', rot=0, color=colormap)
plt.show()
hourly_att.plot(use_index=True, kind='bar', figsize=(20, 5), title='Hourly Attendance', xlabel='Time', rot=0, color=colormap)
plt.show()
all_att.sort_values(ascending=False).plot(use_index=True, kind='bar', figsize=(20, 5), title='Total Attendance', xlabel=None, ylabel='Hours', rot=0, color=colormap)
plt.show()
2021-05-27T12:25:26.384457 image/svg+xml Matplotlib v3.4.1, https://matplotlib.org/
2021-05-27T12:25:26.916034 image/svg+xml Matplotlib v3.4.1, https://matplotlib.org/
2021-05-27T12:25:27.318957 image/svg+xml Matplotlib v3.4.1, https://matplotlib.org/
In [55]:
corr = att_df.corr()
corr
Out[55]:
stratos1805 סדאם חוסיין ז"ל MrStormagedon Cardi Biton rone יניבצ'וק October Detka XPEZNAZ NightSeeker Groovy xWarLord_Sharon פליציק OMGitsyuvalT
stratos1805 1.000000 0.489795 0.298643 0.284557 0.155796 0.277462 0.347159 0.074933 0.301357 0.262684 0.089882 0.011728 0.006452 0.127911
סדאם חוסיין ז"ל 0.489795 1.000000 0.442167 0.385498 0.171153 0.323176 0.392225 0.216310 0.484823 0.353747 0.297674 0.031833 0.135472 0.127421
MrStormagedon 0.298643 0.442167 1.000000 0.243404 0.115154 0.295505 0.346059 0.292071 0.333262 0.277531 0.163399 -0.010566 0.070503 0.023907
Cardi Biton 0.284557 0.385498 0.243404 1.000000 0.133414 0.243749 0.302811 0.176608 0.226813 0.235313 0.106225 -0.000027 -0.010323 0.172302
rone 0.155796 0.171153 0.115154 0.133414 1.000000 0.318817 0.231187 -0.020987 0.238298 0.092852 0.142420 0.343828 0.019468 0.077254
יניבצ'וק 0.277462 0.323176 0.295505 0.243749 0.318817 1.000000 0.598022 -0.026996 0.235959 0.195467 0.170725 0.093068 0.157132 0.132093
October 0.347159 0.392225 0.346059 0.302811 0.231187 0.598022 1.000000 -0.033446 0.261299 0.136883 0.246936 0.021472 0.155157 0.015007
Detka 0.074933 0.216310 0.292071 0.176608 -0.020987 -0.026996 -0.033446 1.000000 0.120397 0.310288 0.057048 -0.017296 -0.017408 -0.021633
XPEZNAZ 0.301357 0.484823 0.333262 0.226813 0.238298 0.235959 0.261299 0.120397 1.000000 0.362598 0.144016 -0.028397 0.093607 0.132281
NightSeeker 0.262684 0.353747 0.277531 0.235313 0.092852 0.195467 0.136883 0.310288 0.362598 1.000000 0.136477 -0.020111 0.115247 0.018436
Groovy 0.089882 0.297674 0.163399 0.106225 0.142420 0.170725 0.246936 0.057048 0.144016 0.136477 1.000000 -0.015958 0.096486 0.026698
xWarLord_Sharon 0.011728 0.031833 -0.010566 -0.000027 0.343828 0.093068 0.021472 -0.017296 -0.028397 -0.020111 -0.015958 1.000000 -0.003936 -0.008903
פליציק 0.006452 0.135472 0.070503 -0.010323 0.019468 0.157132 0.155157 -0.017408 0.093607 0.115247 0.096486 -0.003936 1.000000 -0.008961
OMGitsyuvalT 0.127911 0.127421 0.023907 0.172302 0.077254 0.132093 0.015007 -0.021633 0.132281 0.018436 0.026698 -0.008903 -0.008961 1.000000
In [56]:
plt.figure(figsize=(20, 20))
sns.heatmap(corr, cmap='YlGnBu')
plt.xticks(rotation=0)
plt.show()
2021-05-27T12:25:27.764764 image/svg+xml Matplotlib v3.4.1, https://matplotlib.org/
In [57]:
plt.figure(figsize=(20, 20))
sns.heatmap(corr[(abs(corr) > 0.3)], cmap='YlGnBu')
plt.xticks(rotation=0)
plt.show()
2021-05-27T12:25:28.181648 image/svg+xml Matplotlib v3.4.1, https://matplotlib.org/